Preload things.
# Prepare stuff
setwd("E:/")
The working directory was changed to E:/ inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the the working directory for notebook chunks.
library(recommenderlab)
Loading required package: Matrix
Loading required package: arules
Attaching package: ‘arules’
The following objects are masked from ‘package:base’:
abbreviate, write
Loading required package: proxy
Attaching package: ‘proxy’
The following object is masked from ‘package:Matrix’:
as.matrix
The following objects are masked from ‘package:stats’:
as.dist, dist
The following object is masked from ‘package:base’:
as.matrix
Loading required package: registry
library(data.table)
data.table 1.9.7 IN DEVELOPMENT built 2016-10-04 15:45:32 UTC; travis
unable to identify current timezone 'C':
please set environment variable 'TZ' The fastest way to learn (by data.table authors): https://www.datacamp.com/courses/data-analysis-the-data-table-way
Documentation: ?data.table, example(data.table) and browseVignettes("data.table")
Release notes, videos and slides: http://r-datatable.com
library(infotheo)
Attaching package: ‘infotheo’
The following object is masked from ‘package:arules’:
discretize
library(DT)
library(R.utils)
Loading required package: R.oo
Loading required package: R.methodsS3
R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
Attaching package: ‘R.oo’
The following objects are masked from ‘package:methods’:
getClasses, getMethods
The following objects are masked from ‘package:base’:
attach, detach, gc, load, save
R.utils v2.4.0 (2016-09-13) successfully loaded. See ?R.utils for help.
Attaching package: ‘R.utils’
The following object is masked from ‘package:recommenderlab’:
evaluate
The following object is masked from ‘package:utils’:
timestamp
The following objects are masked from ‘package:base’:
cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(doParallel)
Loading required package: foreach
foreach: simple, scalable parallel programming from Revolution Analytics
Use Revolution R for scalability, fault tolerance and more.
http://www.revolutionanalytics.com
Loading required package: iterators
Loading required package: parallel
library(foreach)
library(rpart)
library(woe)
library(knitr)
library(rmarkdown)
library(ggplot2)
library(colorplaner)
library(ggiraph)
library(gridExtra)
# Load data
numeric <- readRDS("datasets/train_numeric.rds")
gc(verbose = TRUE)
Garbage collection 91 = 42+10+39 (level 2) ...
88.7 Mbytes of cons cells used (63%)
8770.4 Mbytes of vectors used (73%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1660318 88.7 2637877 140.9 2164898 115.7
Vcells 1149553334 8770.4 1568815419 11969.2 1149560573 8770.5
# Prepare data
target <- numeric$Response
gc(verbose = TRUE)
Garbage collection 92 = 42+10+40 (level 2) ...
88.8 Mbytes of cons cells used (63%)
8770.5 Mbytes of vectors used (61%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1662535 88.8 2637877 140.9 2164898 115.7
Vcells 1149554173 8770.5 1882658502 14363.6 1149579090 8770.6
feature_names <- fread("Laurae/date_feat_lut_V2.txt", header = TRUE)
feature_names <- feature_names[!name_num == "", ]
feature_names <- feature_names[, c("name_num", "station_V2", "line", "station", "feature_nr"), with = FALSE]
colnames(feature_names) <- c("Feature", "Station", "Line", "Orig_St", "Feat_Nb")
gc(verbose = TRUE)
Garbage collection 93 = 42+10+41 (level 2) ...
90.2 Mbytes of cons cells used (64%)
8770.9 Mbytes of vectors used (61%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1687620 90.2 2637877 140.9 2164898 115.7
Vcells 1149617258 8770.9 1882658502 14363.6 1149998151 8773.8
used_columns <- fread("E:/Laurae/TimeSeries/used_columns.csv")$x
numeric <- numeric[, used_columns, with = FALSE]
gc(verbose = TRUE)
Garbage collection 95 = 42+10+43 (level 2) ...
90.2 Mbytes of cons cells used (64%)
7669.1 Mbytes of vectors used (44%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1688023 90.2 2637877 140.9 2164898 115.7
Vcells 1005194571 7669.1 2260511758 17246.4 2152269091 16420.6
for (i in used_columns) {
j <- paste(i, "_residuals", sep = "")
numeric[, (j) := fread(paste("E:/Laurae/TimeSeries/", i, "_TS.csv", sep = ""), header = TRUE, select = 2, showProgress = FALSE)$Residuals[1:1183747]]
cat("Loaded ", i, ".\n")
}
Loaded L0_S0_F0 .
Loaded L0_S0_F2 .
Loaded L0_S0_F4 .
Loaded L0_S0_F6 .
Loaded L0_S0_F8 .
Loaded L0_S0_F10 .
Loaded L0_S0_F12 .
Loaded L0_S0_F14 .
Loaded L0_S0_F16 .
Loaded L0_S0_F18 .
Loaded L0_S0_F20 .
Loaded L0_S0_F22 .
Loaded L0_S1_F24 .
Loaded L0_S1_F28 .
Loaded L0_S2_F36 .
Loaded L0_S2_F44 .
Loaded L0_S2_F48 .
Loaded L0_S2_F60 .
Loaded L0_S2_F64 .
Loaded L0_S3_F72 .
Loaded L0_S3_F80 .
Loaded L0_S3_F84 .
Loaded L0_S3_F96 .
Loaded L0_S3_F100 .
Loaded L0_S4_F104 .
Loaded L0_S4_F109 .
Loaded L0_S5_F114 .
Loaded L0_S5_F116 .
Loaded L0_S6_F122 .
Loaded L0_S6_F132 .
Loaded L0_S7_F138 .
Loaded L0_S7_F142 .
Loaded L0_S8_F146 .
Loaded L0_S8_F149 .
Loaded L0_S9_F155 .
Loaded L0_S9_F160 .
Loaded L0_S9_F165 .
Loaded L0_S9_F170 .
Loaded L0_S9_F180 .
Loaded L0_S9_F185 .
Loaded L0_S9_F190 .
Loaded L0_S9_F195 .
Loaded L0_S9_F200 .
Loaded L0_S9_F205 .
Loaded L0_S9_F210 .
Loaded L0_S10_F219 .
Loaded L0_S10_F224 .
Loaded L0_S10_F229 .
Loaded L0_S10_F234 .
Loaded L0_S10_F244 .
Loaded L0_S10_F249 .
Loaded L0_S10_F254 .
Loaded L0_S10_F259 .
Loaded L0_S10_F264 .
Loaded L0_S10_F269 .
Loaded L0_S10_F274 .
Loaded L0_S11_F282 .
Loaded L0_S11_F286 .
Loaded L0_S11_F290 .
Loaded L0_S11_F294 .
Loaded L0_S11_F302 .
Loaded L0_S11_F306 .
Loaded L0_S11_F310 .
Loaded L0_S11_F314 .
Loaded L0_S11_F318 .
Loaded L0_S11_F322 .
Loaded L0_S11_F326 .
Loaded L0_S12_F330 .
Loaded L0_S12_F332 .
Loaded L0_S12_F334 .
Loaded L0_S12_F336 .
Loaded L0_S12_F338 .
Loaded L0_S12_F342 .
Loaded L0_S12_F344 .
Loaded L0_S12_F346 .
Loaded L0_S12_F348 .
Loaded L0_S12_F350 .
Loaded L0_S12_F352 .
Loaded L0_S13_F354 .
Loaded L0_S13_F356 .
Loaded L0_S14_F362 .
Loaded L0_S14_F370 .
Loaded L0_S14_F374 .
Loaded L0_S14_F386 .
Loaded L0_S14_F390 .
Loaded L0_S15_F397 .
Loaded L0_S15_F403 .
Loaded L0_S15_F406 .
Loaded L0_S15_F415 .
Loaded L0_S15_F418 .
Loaded L0_S16_F421 .
Loaded L0_S16_F426 .
Loaded L0_S17_F431 .
Loaded L0_S17_F433 .
Loaded L0_S18_F439 .
Loaded L0_S18_F449 .
Loaded L0_S19_F455 .
Loaded L0_S19_F459 .
Loaded L0_S20_F466 .
Loaded L0_S21_F472 .
Loaded L0_S21_F477 .
Loaded L0_S21_F482 .
Loaded L0_S21_F487 .
Loaded L0_S21_F497 .
Loaded L0_S21_F502 .
Loaded L0_S21_F507 .
Loaded L0_S21_F512 .
Loaded L0_S21_F517 .
Loaded L0_S21_F522 .
Loaded L0_S21_F527 .
Loaded L0_S21_F532 .
Loaded L0_S21_F537 .
Loaded L0_S22_F546 .
Loaded L0_S22_F551 .
Loaded L0_S22_F556 .
Loaded L0_S22_F561 .
Loaded L0_S22_F571 .
Loaded L0_S22_F576 .
Loaded L0_S22_F581 .
Loaded L0_S22_F586 .
Loaded L0_S22_F591 .
Loaded L0_S22_F596 .
Loaded L0_S22_F601 .
Loaded L0_S22_F606 .
Loaded L0_S22_F611 .
Loaded L0_S23_F619 .
Loaded L0_S23_F623 .
Loaded L0_S23_F627 .
Loaded L0_S23_F631 .
Loaded L0_S23_F639 .
Loaded L0_S23_F643 .
Loaded L0_S23_F647 .
Loaded L0_S23_F651 .
Loaded L0_S23_F655 .
Loaded L0_S23_F659 .
Loaded L0_S23_F663 .
Loaded L0_S23_F667 .
Loaded L0_S23_F671 .
Loaded L1_S24_F679 .
Loaded L1_S24_F683 .
Loaded L1_S24_F687 .
Loaded L1_S24_F691 .
Loaded L1_S24_F700 .
Loaded L1_S24_F728 .
Loaded L1_S24_F733 .
Loaded L1_S24_F800 .
Loaded L1_S24_F802 .
Loaded L1_S24_F806 .
Loaded L1_S24_F808 .
Loaded L1_S24_F810 .
Loaded L1_S24_F812 .
Loaded L1_S24_F814 .
Loaded L1_S24_F816 .
Loaded L1_S24_F829 .
Loaded L1_S24_F834 .
Loaded L1_S24_F844 .
Loaded L1_S24_F857 .
Loaded L1_S24_F862 .
Loaded L1_S24_F867 .
Loaded L1_S24_F872 .
Loaded L1_S24_F877 .
Loaded L1_S24_F882 .
Loaded L1_S24_F887 .
Loaded L1_S24_F892 .
Loaded L1_S24_F897 .
Loaded L1_S24_F902 .
Loaded L1_S24_F907 .
Loaded L1_S24_F920 .
Loaded L1_S24_F925 .
Loaded L1_S24_F935 .
Loaded L1_S24_F948 .
Loaded L1_S24_F953 .
Loaded L1_S24_F958 .
Loaded L1_S24_F963 .
Loaded L1_S24_F968 .
Loaded L1_S24_F973 .
Loaded L1_S24_F978 .
Loaded L1_S24_F983 .
Loaded L1_S24_F988 .
Loaded L1_S24_F993 .
Loaded L1_S24_F998 .
Loaded L1_S24_F1000 .
Loaded L1_S24_F1002 .
Loaded L1_S24_F1004 .
Loaded L1_S24_F1006 .
Loaded L1_S24_F1008 .
Loaded L1_S24_F1010 .
Loaded L1_S24_F1012 .
Loaded L1_S24_F1014 .
Loaded L1_S24_F1016 .
Loaded L1_S24_F1021 .
Loaded L1_S24_F1026 .
Loaded L1_S24_F1031 .
Loaded L1_S24_F1036 .
Loaded L1_S24_F1041 .
Loaded L1_S24_F1046 .
Loaded L1_S24_F1056 .
Loaded L1_S24_F1068 .
Loaded L1_S24_F1072 .
Loaded L1_S24_F1079 .
Loaded L1_S24_F1083 .
Loaded L1_S24_F1087 .
Loaded L1_S24_F1094 .
Loaded L1_S24_F1098 .
Loaded L1_S24_F1102 .
Loaded L1_S24_F1106 .
Loaded L1_S24_F1110 .
Loaded L1_S24_F1118 .
Loaded L1_S24_F1122 .
Loaded L1_S24_F1126 .
Loaded L1_S24_F1130 .
Loaded L1_S24_F1134 .
Loaded L1_S24_F1145 .
Loaded L1_S24_F1148 .
Loaded L1_S24_F1161 .
Loaded L1_S24_F1166 .
Loaded L1_S24_F1170 .
Loaded L1_S24_F1172 .
Loaded L1_S24_F1174 .
Loaded L1_S24_F1176 .
Loaded L1_S24_F1180 .
Loaded L1_S24_F1184 .
Loaded L1_S24_F1197 .
Loaded L1_S24_F1202 .
Loaded L1_S24_F1212 .
Loaded L1_S24_F1225 .
Loaded L1_S24_F1230 .
Loaded L1_S24_F1235 .
Loaded L1_S24_F1240 .
Loaded L1_S24_F1245 .
Loaded L1_S24_F1250 .
Loaded L1_S24_F1255 .
Loaded L1_S24_F1260 .
Loaded L1_S24_F1265 .
Loaded L1_S24_F1270 .
Loaded L1_S24_F1275 .
Loaded L1_S24_F1293 .
Loaded L1_S24_F1303 .
Loaded L1_S24_F1316 .
Loaded L1_S24_F1321 .
Loaded L1_S24_F1326 .
Loaded L1_S24_F1331 .
Loaded L1_S24_F1336 .
Loaded L1_S24_F1341 .
Loaded L1_S24_F1346 .
Loaded L1_S24_F1351 .
Loaded L1_S24_F1356 .
Loaded L1_S24_F1361 .
Loaded L1_S24_F1366 .
Loaded L1_S24_F1371 .
Loaded L1_S24_F1376 .
Loaded L1_S24_F1381 .
Loaded L1_S24_F1386 .
Loaded L1_S24_F1391 .
Loaded L1_S24_F1396 .
Loaded L1_S24_F1401 .
Loaded L1_S24_F1406 .
Loaded L1_S24_F1411 .
Loaded L1_S24_F1416 .
Loaded L1_S24_F1421 .
Loaded L1_S24_F1426 .
Loaded L1_S24_F1431 .
Loaded L1_S24_F1436 .
Loaded L1_S24_F1441 .
Loaded L1_S24_F1446 .
Loaded L1_S24_F1451 .
Loaded L1_S24_F1463 .
Loaded L1_S24_F1467 .
Loaded L1_S24_F1474 .
Loaded L1_S24_F1478 .
Loaded L1_S24_F1482 .
Loaded L1_S24_F1490 .
Loaded L1_S24_F1494 .
Loaded L1_S24_F1498 .
Loaded L1_S24_F1502 .
Loaded L1_S24_F1506 .
Loaded L1_S24_F1512 .
Loaded L1_S24_F1514 .
Loaded L1_S24_F1516 .
Loaded L1_S24_F1518 .
Loaded L1_S24_F1520 .
Loaded L1_S24_F1539 .
Loaded L1_S24_F1544 .
Loaded L1_S24_F1565 .
Loaded L1_S24_F1567 .
Loaded L1_S24_F1569 .
Loaded L1_S24_F1571 .
Loaded L1_S24_F1573 .
Loaded L1_S24_F1575 .
Loaded L1_S24_F1578 .
Loaded L1_S24_F1581 .
Loaded L1_S24_F1594 .
Loaded L1_S24_F1599 .
Loaded L1_S24_F1604 .
Loaded L1_S24_F1609 .
Loaded L1_S24_F1622 .
Loaded L1_S24_F1627 .
Loaded L1_S24_F1632 .
Loaded L1_S24_F1637 .
Loaded L1_S24_F1642 .
Loaded L1_S24_F1647 .
Loaded L1_S24_F1652 .
Loaded L1_S24_F1657 .
Loaded L1_S24_F1662 .
Loaded L1_S24_F1667 .
Loaded L1_S24_F1672 .
Loaded L1_S24_F1685 .
Loaded L1_S24_F1690 .
Loaded L1_S24_F1695 .
Loaded L1_S24_F1700 .
Loaded L1_S24_F1713 .
Loaded L1_S24_F1718 .
Loaded L1_S24_F1723 .
Loaded L1_S24_F1728 .
Loaded L1_S24_F1733 .
Loaded L1_S24_F1738 .
Loaded L1_S24_F1743 .
Loaded L1_S24_F1748 .
Loaded L1_S24_F1753 .
Loaded L1_S24_F1758 .
Loaded L1_S24_F1763 .
Loaded L1_S24_F1768 .
Loaded L1_S24_F1773 .
Loaded L1_S24_F1778 .
Loaded L1_S24_F1783 .
Loaded L1_S24_F1788 .
Loaded L1_S24_F1793 .
Loaded L1_S24_F1798 .
Loaded L1_S24_F1803 .
Loaded L1_S24_F1808 .
Loaded L1_S24_F1810 .
Loaded L1_S24_F1812 .
Loaded L1_S24_F1814 .
Loaded L1_S24_F1816 .
Loaded L1_S24_F1818 .
Loaded L1_S24_F1820 .
Loaded L1_S24_F1822 .
Loaded L1_S24_F1824 .
Loaded L1_S24_F1829 .
Loaded L1_S24_F1831 .
Loaded L1_S24_F1834 .
Loaded L1_S24_F1836 .
Loaded L1_S24_F1838 .
Loaded L1_S24_F1842 .
Loaded L1_S24_F1844 .
Loaded L1_S24_F1846 .
Loaded L1_S24_F1848 .
Loaded L1_S24_F1850 .
Loaded L1_S25_F1855 .
Loaded L1_S25_F1858 .
Loaded L1_S25_F1865 .
Loaded L1_S25_F1869 .
Loaded L1_S25_F1881 .
Loaded L1_S25_F1885 .
Loaded L1_S25_F1890 .
Loaded L1_S25_F1892 .
Loaded L1_S25_F1894 .
Loaded L1_S25_F1896 .
Loaded L1_S25_F1900 .
Loaded L1_S25_F1909 .
Loaded L1_S25_F1914 .
Loaded L1_S25_F1919 .
Loaded L1_S25_F1924 .
Loaded L1_S25_F1929 .
Loaded L1_S25_F1938 .
Loaded L1_S25_F1943 .
Loaded L1_S25_F1948 .
Loaded L1_S25_F1953 .
Loaded L1_S25_F1958 .
Loaded L1_S25_F1963 .
Loaded L1_S25_F1968 .
Loaded L1_S25_F1973 .
Loaded L1_S25_F1978 .
Loaded L1_S25_F1987 .
Loaded L1_S25_F1992 .
Loaded L1_S25_F1997 .
Loaded L1_S25_F2002 .
Loaded L1_S25_F2007 .
Loaded L1_S25_F2016 .
Loaded L1_S25_F2021 .
Loaded L1_S25_F2026 .
Loaded L1_S25_F2031 .
Loaded L1_S25_F2036 .
Loaded L1_S25_F2046 .
Loaded L1_S25_F2051 .
Loaded L1_S25_F2056 .
Loaded L1_S25_F2061 .
Loaded L1_S25_F2066 .
Loaded L1_S25_F2071 .
Loaded L1_S25_F2076 .
Loaded L1_S25_F2081 .
Loaded L1_S25_F2086 .
Loaded L1_S25_F2091 .
Loaded L1_S25_F2096 .
Loaded L1_S25_F2101 .
Loaded L1_S25_F2106 .
Loaded L1_S25_F2111 .
Loaded L1_S25_F2116 .
Loaded L1_S25_F2121 .
Loaded L1_S25_F2126 .
Loaded L1_S25_F2131 .
Loaded L1_S25_F2136 .
Loaded L1_S25_F2144 .
Loaded L1_S25_F2147 .
Loaded L1_S25_F2152 .
Loaded L1_S25_F2155 .
Loaded L1_S25_F2158 .
Loaded L1_S25_F2161 .
Loaded L1_S25_F2164 .
Loaded L1_S25_F2167 .
Loaded L1_S25_F2170 .
Loaded L1_S25_F2173 .
Loaded L1_S25_F2176 .
Loaded L1_S25_F2193 .
Loaded L1_S25_F2196 .
Loaded L1_S25_F2199 .
Loaded L1_S25_F2217 .
Loaded L1_S25_F2220 .
Loaded L1_S25_F2223 .
Loaded L1_S25_F2231 .
Loaded L1_S25_F2233 .
Loaded L1_S25_F2237 .
Loaded L1_S25_F2239 .
Loaded L1_S25_F2241 .
Loaded L1_S25_F2243 .
Loaded L1_S25_F2245 .
Loaded L1_S25_F2247 .
Loaded L1_S25_F2249 .
Loaded L1_S25_F2258 .
Loaded L1_S25_F2263 .
Loaded L1_S25_F2268 .
Loaded L1_S25_F2273 .
Loaded L1_S25_F2278 .
Loaded L1_S25_F2287 .
Loaded L1_S25_F2292 .
Loaded L1_S25_F2297 .
Loaded L1_S25_F2302 .
Loaded L1_S25_F2307 .
Loaded L1_S25_F2312 .
Loaded L1_S25_F2317 .
Loaded L1_S25_F2322 .
Loaded L1_S25_F2327 .
Loaded L1_S25_F2336 .
Loaded L1_S25_F2341 .
Loaded L1_S25_F2346 .
Loaded L1_S25_F2351 .
Loaded L1_S25_F2356 .
Loaded L1_S25_F2365 .
Loaded L1_S25_F2370 .
Loaded L1_S25_F2375 .
Loaded L1_S25_F2380 .
Loaded L1_S25_F2385 .
Loaded L1_S25_F2390 .
Loaded L1_S25_F2395 .
Loaded L1_S25_F2400 .
Loaded L1_S25_F2405 .
Loaded L1_S25_F2408 .
Loaded L1_S25_F2411 .
Loaded L1_S25_F2414 .
Loaded L1_S25_F2417 .
Loaded L1_S25_F2420 .
Loaded L1_S25_F2423 .
Loaded L1_S25_F2426 .
Loaded L1_S25_F2429 .
Loaded L1_S25_F2431 .
Loaded L1_S25_F2433 .
Loaded L1_S25_F2435 .
Loaded L1_S25_F2437 .
Loaded L1_S25_F2439 .
Loaded L1_S25_F2441 .
Loaded L1_S25_F2443 .
Loaded L1_S25_F2449 .
Loaded L1_S25_F2451 .
Loaded L1_S25_F2454 .
Loaded L1_S25_F2456 .
Loaded L1_S25_F2458 .
Loaded L1_S25_F2460 .
Loaded L1_S25_F2462 .
Loaded L1_S25_F2464 .
Loaded L1_S25_F2466 .
Loaded L1_S25_F2468 .
Loaded L1_S25_F2484 .
Loaded L1_S25_F2487 .
Loaded L1_S25_F2490 .
Loaded L1_S25_F2498 .
Loaded L1_S25_F2500 .
Loaded L1_S25_F2504 .
Loaded L1_S25_F2506 .
Loaded L1_S25_F2508 .
Loaded L1_S25_F2514 .
Loaded L1_S25_F2516 .
Loaded L1_S25_F2525 .
Loaded L1_S25_F2530 .
Loaded L1_S25_F2535 .
Loaded L1_S25_F2545 .
Loaded L1_S25_F2554 .
Loaded L1_S25_F2559 .
Loaded L1_S25_F2564 .
Loaded L1_S25_F2569 .
Loaded L1_S25_F2574 .
Loaded L1_S25_F2579 .
Loaded L1_S25_F2584 .
Loaded L1_S25_F2589 .
Loaded L1_S25_F2594 .
Loaded L1_S25_F2603 .
Loaded L1_S25_F2608 .
Loaded L1_S25_F2613 .
Loaded L1_S25_F2623 .
Loaded L1_S25_F2632 .
Loaded L1_S25_F2637 .
Loaded L1_S25_F2642 .
Loaded L1_S25_F2647 .
Loaded L1_S25_F2652 .
Loaded L1_S25_F2657 .
Loaded L1_S25_F2662 .
Loaded L1_S25_F2667 .
Loaded L1_S25_F2672 .
Loaded L1_S25_F2677 .
Loaded L1_S25_F2682 .
Loaded L1_S25_F2687 .
Loaded L1_S25_F2692 .
Loaded L1_S25_F2697 .
Loaded L1_S25_F2702 .
Loaded L1_S25_F2707 .
Loaded L1_S25_F2712 .
Loaded L1_S25_F2714 .
Loaded L1_S25_F2716 .
Loaded L1_S25_F2718 .
Loaded L1_S25_F2720 .
Loaded L1_S25_F2722 .
Loaded L1_S25_F2724 .
Loaded L1_S25_F2726 .
Loaded L1_S25_F2732 .
Loaded L1_S25_F2734 .
Loaded L1_S25_F2737 .
Loaded L1_S25_F2739 .
Loaded L1_S25_F2741 .
Loaded L1_S25_F2743 .
Loaded L1_S25_F2745 .
Loaded L1_S25_F2747 .
Loaded L1_S25_F2749 .
Loaded L1_S25_F2751 .
Loaded L1_S25_F2767 .
Loaded L1_S25_F2770 .
Loaded L1_S25_F2773 .
Loaded L1_S25_F2781 .
Loaded L1_S25_F2783 .
Loaded L1_S25_F2787 .
Loaded L1_S25_F2789 .
Loaded L1_S25_F2791 .
Loaded L1_S25_F2793 .
Loaded L1_S25_F2795 .
Loaded L1_S25_F2797 .
Loaded L1_S25_F2799 .
Loaded L1_S25_F2808 .
Loaded L1_S25_F2813 .
Loaded L1_S25_F2818 .
Loaded L1_S25_F2823 .
Loaded L1_S25_F2828 .
Loaded L1_S25_F2837 .
Loaded L1_S25_F2842 .
Loaded L1_S25_F2847 .
Loaded L1_S25_F2852 .
Loaded L1_S25_F2867 .
Loaded L1_S25_F2872 .
Loaded L1_S25_F2877 .
Loaded L1_S25_F2886 .
Loaded L1_S25_F2891 .
Loaded L1_S25_F2896 .
Loaded L1_S25_F2901 .
Loaded L1_S25_F2906 .
Loaded L1_S25_F2915 .
Loaded L1_S25_F2920 .
Loaded L1_S25_F2925 .
Loaded L1_S25_F2930 .
Loaded L1_S25_F2935 .
Loaded L1_S25_F2940 .
Loaded L1_S25_F2945 .
Loaded L1_S25_F2950 .
Loaded L1_S25_F2955 .
Loaded L1_S25_F2960 .
Loaded L1_S25_F2965 .
Loaded L1_S25_F2970 .
Loaded L1_S25_F2975 .
Loaded L1_S25_F2980 .
Loaded L1_S25_F2985 .
Loaded L1_S25_F2990 .
Loaded L1_S25_F2995 .
Loaded L1_S25_F2997 .
Loaded L1_S25_F2999 .
Loaded L1_S25_F3001 .
Loaded L1_S25_F3003 .
Loaded L1_S25_F3005 .
Loaded L1_S25_F3007 .
Loaded L1_S25_F3009 .
Loaded L1_S25_F3015 .
Loaded L1_S25_F3017 .
Loaded L1_S25_F3020 .
Loaded L1_S25_F3022 .
Loaded L1_S25_F3024 .
Loaded L1_S25_F3026 .
Loaded L1_S25_F3028 .
Loaded L1_S25_F3030 .
Loaded L1_S25_F3032 .
Loaded L1_S25_F3034 .
Loaded L2_S26_F3036 .
Loaded L2_S26_F3040 .
Loaded L2_S26_F3047 .
Loaded L2_S26_F3051 .
Loaded L2_S26_F3055 .
Loaded L2_S26_F3062 .
Loaded L2_S26_F3069 .
Loaded L2_S26_F3073 .
Loaded L2_S26_F3077 .
Loaded L2_S26_F3106 .
Loaded L2_S26_F3113 .
Loaded L2_S26_F3117 .
Loaded L2_S26_F3121 .
Loaded L2_S26_F3125 .
Loaded L2_S27_F3129 .
Loaded L2_S27_F3133 .
Loaded L2_S27_F3140 .
Loaded L2_S27_F3144 .
Loaded L2_S27_F3148 .
Loaded L2_S27_F3155 .
Loaded L2_S27_F3162 .
Loaded L2_S27_F3166 .
Loaded L2_S27_F3170 .
Loaded L2_S27_F3199 .
Loaded L2_S27_F3206 .
Loaded L2_S27_F3210 .
Loaded L2_S27_F3214 .
Loaded L2_S27_F3218 .
Loaded L2_S28_F3222 .
Loaded L2_S28_F3226 .
Loaded L2_S28_F3233 .
Loaded L2_S28_F3237 .
Loaded L2_S28_F3241 .
Loaded L2_S28_F3248 .
Loaded L2_S28_F3255 .
Loaded L2_S28_F3259 .
Loaded L2_S28_F3263 .
Loaded L2_S28_F3292 .
Loaded L2_S28_F3299 .
Loaded L2_S28_F3303 .
Loaded L2_S28_F3307 .
Loaded L2_S28_F3311 .
Loaded L3_S29_F3315 .
Loaded L3_S29_F3318 .
Loaded L3_S29_F3321 .
Loaded L3_S29_F3324 .
Loaded L3_S29_F3327 .
Loaded L3_S29_F3330 .
Loaded L3_S29_F3333 .
Loaded L3_S29_F3336 .
Loaded L3_S29_F3339 .
Loaded L3_S29_F3342 .
Loaded L3_S29_F3345 .
Loaded L3_S29_F3348 .
Loaded L3_S29_F3351 .
Loaded L3_S29_F3354 .
Loaded L3_S29_F3357 .
Loaded L3_S29_F3367 .
Loaded L3_S29_F3370 .
Loaded L3_S29_F3373 .
Loaded L3_S29_F3376 .
Loaded L3_S29_F3379 .
Loaded L3_S29_F3382 .
Loaded L3_S29_F3385 .
Loaded L3_S29_F3388 .
Loaded L3_S29_F3395 .
Loaded L3_S29_F3401 .
Loaded L3_S29_F3404 .
Loaded L3_S29_F3407 .
Loaded L3_S29_F3412 .
Loaded L3_S29_F3421 .
Loaded L3_S29_F3424 .
Loaded L3_S29_F3427 .
Loaded L3_S29_F3430 .
Loaded L3_S29_F3433 .
Loaded L3_S29_F3436 .
Loaded L3_S29_F3439 .
Loaded L3_S29_F3442 .
Loaded L3_S29_F3449 .
Loaded L3_S29_F3452 .
Loaded L3_S29_F3455 .
Loaded L3_S29_F3458 .
Loaded L3_S29_F3461 .
Loaded L3_S29_F3467 .
Loaded L3_S29_F3473 .
Loaded L3_S29_F3476 .
Loaded L3_S29_F3479 .
Loaded L3_S29_F3482 .
Loaded L3_S29_F3485 .
Loaded L3_S29_F3488 .
Loaded L3_S29_F3491 .
Loaded L3_S30_F3494 .
Loaded L3_S30_F3499 .
Loaded L3_S30_F3504 .
Loaded L3_S30_F3514 .
Loaded L3_S30_F3519 .
Loaded L3_S30_F3524 .
Loaded L3_S30_F3529 .
Loaded L3_S30_F3534 .
Loaded L3_S30_F3539 .
Loaded L3_S30_F3544 .
Loaded L3_S30_F3549 .
Loaded L3_S30_F3554 .
Loaded L3_S30_F3559 .
Loaded L3_S30_F3564 .
Loaded L3_S30_F3569 .
Loaded L3_S30_F3574 .
Loaded L3_S30_F3579 .
Loaded L3_S30_F3584 .
Loaded L3_S30_F3589 .
Loaded L3_S30_F3604 .
Loaded L3_S30_F3609 .
Loaded L3_S30_F3624 .
Loaded L3_S30_F3629 .
Loaded L3_S30_F3634 .
Loaded L3_S30_F3639 .
Loaded L3_S30_F3644 .
Loaded L3_S30_F3649 .
Loaded L3_S30_F3664 .
Loaded L3_S30_F3669 .
Loaded L3_S30_F3674 .
Loaded L3_S30_F3679 .
Loaded L3_S30_F3684 .
Loaded L3_S30_F3689 .
Loaded L3_S30_F3704 .
Loaded L3_S30_F3709 .
Loaded L3_S30_F3724 .
Loaded L3_S30_F3729 .
Loaded L3_S30_F3734 .
Loaded L3_S30_F3739 .
Loaded L3_S30_F3744 .
Loaded L3_S30_F3749 .
Loaded L3_S30_F3754 .
Loaded L3_S30_F3759 .
Loaded L3_S30_F3764 .
Loaded L3_S30_F3769 .
Loaded L3_S30_F3774 .
Loaded L3_S30_F3784 .
Loaded L3_S30_F3794 .
Loaded L3_S30_F3799 .
Loaded L3_S30_F3804 .
Loaded L3_S30_F3809 .
Loaded L3_S30_F3819 .
Loaded L3_S30_F3829 .
Loaded L3_S31_F3834 .
Loaded L3_S31_F3846 .
Loaded L3_S32_F3850 .
Loaded L3_S33_F3855 .
Loaded L3_S33_F3857 .
Loaded L3_S33_F3859 .
Loaded L3_S33_F3861 .
Loaded L3_S33_F3863 .
Loaded L3_S33_F3865 .
Loaded L3_S33_F3867 .
Loaded L3_S33_F3869 .
Loaded L3_S33_F3871 .
Loaded L3_S33_F3873 .
Loaded L3_S34_F3876 .
Loaded L3_S34_F3878 .
Loaded L3_S34_F3880 .
Loaded L3_S34_F3882 .
Loaded L3_S35_F3889 .
Loaded L3_S35_F3896 .
Loaded L3_S35_F3898 .
Loaded L3_S35_F3903 .
Loaded L3_S35_F3908 .
Loaded L3_S35_F3913 .
Loaded L3_S36_F3920 .
Loaded L3_S36_F3924 .
Loaded L3_S36_F3926 .
Loaded L3_S36_F3930 .
Loaded L3_S36_F3934 .
Loaded L3_S36_F3938 .
Loaded L3_S37_F3944 .
Loaded L3_S37_F3946 .
Loaded L3_S37_F3948 .
Loaded L3_S37_F3950 .
Loaded L3_S38_F3956 .
Loaded L3_S38_F3960 .
Loaded L3_S39_F3964 .
Loaded L3_S39_F3972 .
Loaded L3_S39_F3976 .
Loaded L3_S40_F3980 .
Loaded L3_S40_F3982 .
Loaded L3_S40_F3984 .
Loaded L3_S40_F3986 .
Loaded L3_S40_F3992 .
Loaded L3_S40_F3994 .
Loaded L3_S41_F3998 .
Loaded L3_S41_F4000 .
Loaded L3_S41_F4002 .
Loaded L3_S41_F4004 .
Loaded L3_S41_F4006 .
Loaded L3_S41_F4008 .
Loaded L3_S41_F4011 .
Loaded L3_S41_F4014 .
Loaded L3_S41_F4016 .
Loaded L3_S41_F4018 .
Loaded L3_S41_F4020 .
Loaded L3_S41_F4023 .
Loaded L3_S41_F4026 .
Loaded L3_S43_F4065 .
Loaded L3_S43_F4070 .
Loaded L3_S43_F4075 .
Loaded L3_S43_F4080 .
Loaded L3_S43_F4085 .
Loaded L3_S43_F4090 .
Loaded L3_S43_F4095 .
Loaded L3_S44_F4106 .
Loaded L3_S44_F4109 .
Loaded L3_S44_F4112 .
Loaded L3_S44_F4115 .
Loaded L3_S44_F4118 .
Loaded L3_S44_F4121 .
Loaded L3_S45_F4124 .
Loaded L3_S45_F4128 .
Loaded L3_S45_F4130 .
Loaded L3_S45_F4132 .
Loaded L3_S47_F4138 .
Loaded L3_S47_F4143 .
Loaded L3_S47_F4148 .
Loaded L3_S47_F4153 .
Loaded L3_S47_F4158 .
Loaded L3_S47_F4163 .
Loaded L3_S47_F4168 .
Loaded L3_S47_F4178 .
Loaded L3_S47_F4183 .
Loaded L3_S47_F4188 .
Loaded L3_S48_F4196 .
Loaded L3_S48_F4198 .
Loaded L3_S48_F4200 .
Loaded L3_S48_F4202 .
Loaded L3_S48_F4204 .
Loaded L3_S49_F4211 .
Loaded L3_S49_F4221 .
Loaded L3_S49_F4226 .
Loaded L3_S49_F4231 .
Loaded L3_S50_F4243 .
Loaded L3_S50_F4249 .
Loaded L3_S50_F4251 .
Loaded L3_S51_F4260 .
Loaded L3_S51_F4262 .
#saveRDS(data, "Laurae/NumericCMI_exact_best_grid/Train_features.rds")
#saveRDS(target, "E:/Laurae/NumericCMI_exact_best_grid/Train_target.rds")
Dictionary:
ig_frame <- data.frame(matrix(nrow = 847, ncol = 5))
colnames(ig_frame) <- c("Feature", "Levels", "NotMissing", "IV_Raw", "IV_TS")
ig_frame$Feature <- colnames(numeric)[1:847]
all_frame <- fread("E:/Laurae/NumericCMI_exact_best_grid/IG_2216feat.csv", data.table = FALSE)
ig_frame <- merge(ig_frame, all_frame, by = "Feature", all.x = TRUE, sort = FALSE)
ig_frame <- ig_frame[, c("Feature", "Levels.y", "NotMissing.y", "IV_Raw", "IV_TS")]
colnames(ig_frame) <- c("Feature", "Levels", "NotMissing", "IV_Raw", "IV_TS")
cores <- 6
choose <- cores * 5 # 5 times per core per batch
mcl <- makeCluster(cores)
invisible(clusterEvalQ(mcl, library("woe")))
invisible(clusterEvalQ(mcl, library("data.table")))
invisible(clusterEvalQ(mcl, library("rpart")))
registerDoParallel(cl = mcl)
# Magic function to coerce super quickly
setDF <- function(x) {
if (!is.data.table(x))
stop("x must be a data.table")
setattr(x, "row.names", .set_row_names(nrow(x)))
setattr(x, "class", "data.frame")
setattr(x, "sorted", NULL)
setattr(x, ".internal.selfref", NULL)
}
# Optimized for maximum speed
gc(verbose = TRUE)
Garbage collection 108 = 51+12+45 (level 2) ...
95.6 Mbytes of cons cells used (56%)
15320.9 Mbytes of vectors used (62%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1789626 95.6 3205452 171.2 3205452 171.2
Vcells 2008133165 15320.9 3255312930 24836.1 2260479265 17246.1
StartTime <- System$currentTimeMillis()
cat("Information Value (Raw) Job started on ", format(Sys.time(), "%a %b %d %Y %X"), ". \n", sep = "")
Information Value (Raw) Job started on Mon Oct 17 2016 12:08:38 AM.
pb <- winProgressBar(title = "Information Value (Raw) computation", label = paste("[", format(Sys.time(), "%a %b %d %Y %X"), "] Preparing computation...", sep = ""), min = 0, max = 847, initial = 0, width = 520)
# Do loop
for (i in 1:ceiling(847 / choose)) {
# Prepare parallel loop
mini_temp <- numeric[, ((i - 1) * choose + 1):min((i * choose), 847), with = FALSE]
mini_temp$target <- target
setDF(mini_temp)
clusterExport(mcl, c("mini_temp"))
# Parallel loop setup
ig_frame$IV_Raw[((i - 1) * choose + 1):min((i * choose), 847)] <- foreach(j = 1:((min((i * choose), 847) - ((i - 1) * choose + 1)) + 1), .combine = "c", .inorder = TRUE, .noexport = "mini_temp") %dopar% {
set.seed(11111)
whatever <- iv.mult(df = mini_temp,
y = "target",
vars = colnames(mini_temp)[j],
sql = FALSE,
topbin = TRUE,
tbpct = 0.0001,
verbose = FALSE,
rcontrol = rpart.control(minsplit = 100, cp = 0.00001, xval = 5))
return(ifelse(length(whatever) == 0, 0, sum(whatever[[1]]$miv, na.rm = TRUE)))
}
invisible(clusterEvalQ(mcl, gc(verbose = FALSE)))
# Harvest statistics
cat("Batch ", i, ": mean IV = ", mean(ig_frame$IV_Raw[((i - 1) * choose + 1):min((i * choose), 847)]), " \n", sep = "")
CurrentTime <- System$currentTimeMillis()
if (i < ceiling(847 / choose)) {
ETA <- ifelse(i == ceiling(847 / choose), 0, (((847 / choose) - i) * (CurrentTime - StartTime) / i / 1000))
pb_title <- paste("Information Value (Raw) computation [CPU=", sprintf("%07.2f", (CurrentTime - StartTime) / 1000), "s | ETA=", sprintf("%07.2f", ETA), "s]", sep = "")
pb_iter <- ((CurrentTime - StartTime) / 1000) / (i * choose)
setWinProgressBar(pb, value = i * choose, title = pb_title, label = paste("[", format(Sys.time(), "%X"), " | ", sprintf("%04.2f", pb_iter), " s/iter] Doing feature ", colnames(numeric)[i * choose], " (", sprintf("%04d", i * choose), " / 2216 = ", sprintf("%05.2f", 100 * (i * choose) / 847), "%)...", sep = ""))
}
}
Batch 1: mean IV = 0.0260151
Batch 2: mean IV = 0.01882139
Batch 3: mean IV = 0.02608131
Batch 4: mean IV = 0.02133668
Batch 5: mean IV = 0.01235418
Batch 6: mean IV = 0.001723332
Batch 7: mean IV = 0.002133675
Batch 8: mean IV = 0.003542311
Batch 9: mean IV = 0.002831129
Batch 10: mean IV = 0.03711379
Batch 11: mean IV = 0.03486352
Batch 12: mean IV = 0.04003867
Batch 13: mean IV = 0.004349045
Batch 14: mean IV = 0.007304956
Batch 15: mean IV = 0.0005045246
Batch 16: mean IV = 0.0003688799
Batch 17: mean IV = 5.365315e-05
Batch 18: mean IV = 1.743335e-06
Batch 19: mean IV = 0.0008791858
Batch 20: mean IV = 0.0006374219
Batch 21: mean IV = 0.03552357
Batch 22: mean IV = 0.04726587
Batch 23: mean IV = 0.09383893
Batch 24: mean IV = 0.01609851
Batch 25: mean IV = 0.01982167
Batch 26: mean IV = 0.09125971
Batch 27: mean IV = 0.007076523
Batch 28: mean IV = 0.005951134
Batch 29: mean IV = 0.001055506
invisible(close(pb))
cat("Information Value (Raw) Computation time: ", sprintf("%07.2f", (System$currentTimeMillis() - StartTime) / 1000), "s. \n", sep = "")
Information Value (Raw) Computation time: 2372.89s.
cat("Information Value (Raw) Job over on ", format(Sys.time(), "%a %b %d %Y %X"), ". \n", sep = "")
Information Value (Raw) Job over on Mon Oct 17 2016 12:48:11 AM.
# Optimized for maximum speed
gc(verbose = TRUE)
Garbage collection 122 = 61+12+49 (level 2) ...
102.5 Mbytes of cons cells used (60%)
15602.6 Mbytes of vectors used (63%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1919235 102.5 3205452 171.2 3205452 171.2
Vcells 2045058987 15602.6 3255312930 24836.1 3052366883 23287.8
StartTime <- System$currentTimeMillis()
cat("Information Value (TS) Job started on ", format(Sys.time(), "%a %b %d %Y %X"), ". \n", sep = "")
Information Value (TS) Job started on Mon Oct 17 2016 01:23:16 AM.
pb <- winProgressBar(title = "Information Value (TS) computation", label = paste("[", format(Sys.time(), "%a %b %d %Y %X"), "] Preparing computation...", sep = ""), min = 0, max = 847, initial = 0, width = 520)
# Do loop
for (i in 1:ceiling(847 / choose)) {
# Prepare parallel loop
mini_temp <- numeric[, (847 + ((i - 1) * choose + 1)):(847 + min((i * choose), 847)), with = FALSE]
mini_temp$target <- target
setDF(mini_temp)
clusterExport(mcl, c("mini_temp"))
# Parallel loop setup
ig_frame$IV_TS[((i - 1) * choose + 1):min((i * choose), 847)] <- foreach(j = 1:((min((i * choose), 847) - ((i - 1) * choose + 1)) + 1), .combine = "c", .inorder = TRUE, .noexport = "mini_temp") %dopar% {
set.seed(11111)
whatever <- iv.mult(df = mini_temp,
y = "target",
vars = colnames(mini_temp)[j],
sql = FALSE,
topbin = TRUE,
tbpct = 0.0001,
verbose = FALSE,
rcontrol = rpart.control(minsplit = 100, cp = 0.00001, xval = 5))
return(ifelse(length(whatever) == 0, 0, sum(whatever[[1]]$miv, na.rm = TRUE)))
}
invisible(clusterEvalQ(mcl, gc(verbose = FALSE)))
# Harvest statistics
cat("Batch ", i, ": mean IV = ", mean(ig_frame$IV_TS[((i - 1) * choose + 1):min((i * choose), 847)]), " \n", sep = "")
CurrentTime <- System$currentTimeMillis()
if (i < ceiling(847 / choose)) {
ETA <- ifelse(i == ceiling(847 / choose), 0, (((847 / choose) - i) * (CurrentTime - StartTime) / i / 1000))
pb_title <- paste("Information Value (TS) computation [CPU=", sprintf("%07.2f", (CurrentTime - StartTime) / 1000), "s | ETA=", sprintf("%07.2f", ETA), "s]", sep = "")
pb_iter <- ((CurrentTime - StartTime) / 1000) / (i * choose)
setWinProgressBar(pb, value = i * choose, title = pb_title, label = paste("[", format(Sys.time(), "%X"), " | ", sprintf("%04.2f", pb_iter), " s/iter] Doing feature ", colnames(numeric)[i * choose], " (", sprintf("%04d", i * choose), " / 847 = ", sprintf("%05.2f", 100 * (i * choose) / 847), "%)...", sep = ""))
}
}
Batch 1: mean IV = 0.01582562
Batch 2: mean IV = 0.01209923
Batch 3: mean IV = 0.008088839
Batch 4: mean IV = 0.01302411
Batch 5: mean IV = 0.01074356
Batch 6: mean IV = 0.003403406
Batch 7: mean IV = 0.003527316
Batch 8: mean IV = 0.005118777
Batch 9: mean IV = 0.00337424
Batch 10: mean IV = 0.03309113
Batch 11: mean IV = 0.1269662
Batch 12: mean IV = 0.1041677
Batch 13: mean IV = 0.005665669
Batch 14: mean IV = 0.006770514
Batch 15: mean IV = 0.001296637
Batch 16: mean IV = 0.001698381
Batch 17: mean IV = 0.0004207886
Batch 18: mean IV = 0.0006988822
Batch 19: mean IV = 0.0009079932
Batch 20: mean IV = 0.001300575
Batch 21: mean IV = 0.09758265
Batch 22: mean IV = 0.04890238
Batch 23: mean IV = 0.08834347
Batch 24: mean IV = 0.0114363
Batch 25: mean IV = 0.009829268
Batch 26: mean IV = 0.09226964
Batch 27: mean IV = 0.006198878
Batch 28: mean IV = 0.01006495
Batch 29: mean IV = 0.0009639902
invisible(close(pb))
cat("Information Value (TS) Computation time: ", sprintf("%07.2f", (System$currentTimeMillis() - StartTime) / 1000), "s. \n", sep = "")
Information Value (TS) Computation time: 2495.42s.
cat("Information Value (TS) Job over on ", format(Sys.time(), "%a %b %d %Y %X"), ". \n", sep = "")
Information Value (TS) Job over on Mon Oct 17 2016 02:04:51 AM.
registerDoSEQ()
stopCluster(mcl)
closeAllConnections()
all_frame <- ig_frame
all_frame$RankIV_Raw <- (nrow(all_frame) + 1) - rank(all_frame$IV_Raw, ties.method = "max")
all_frame$RankIV_TS <- (nrow(all_frame) + 1) - rank(all_frame$IV_TS, ties.method = "max")
all_frame$TSvsRaw <- rep("", nrow(all_frame))
all_frame$TSvsRaw[all_frame$IV_Raw > all_frame$IV_TS] <- "Raw"
all_frame$TSvsRaw[all_frame$IV_Raw < all_frame$IV_TS] <- "TS"
all_frame$TSvsRaw[all_frame$IV_Raw == all_frame$IV_TS] <- "None"
all_frame$TSvsRaw <- as.factor(all_frame$TSvsRaw)
gc(verbose = TRUE)
Garbage collection 140 = 71+13+56 (level 2) ...
106.2 Mbytes of cons cells used (62%)
15395.8 Mbytes of vectors used (62%)
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 1987393 106.2 3205452 171.2 3205452 171.2
Vcells 2017951504 15395.8 3255312930 24836.1 3052366883 23287.8
fwrite(all_frame, "E:/Laurae/NumericCMI_exact_best_grid/IV_TSfeat.csv")
Summary :
datatable(all_frame,
filter = "top",
class = "cell-border stripe",
plugins = "natural",
extensions = c("AutoFill",
#"Buttons",
"ColReorder",
"KeyTable",
"Responsive",
"RowReorder"),
options = list(style = "bootstrap",
pageLength = 20,
lengthMenu = c(5, 10, 15, 20, 25, 50, 100, 250, 500, 1000),
order = list(list(5, "desc")),
autofill = TRUE,
#dom = "Bfrtip",
#buttons = c("copy", "csv", "excel", "pdf", "print"),
colReorder = TRUE,
keys = TRUE,
rowReorder = TRUE,
searchHighlight = TRUE,
search = list(regex = TRUE, caseInsensitive = FALSE))
) %>% formatStyle('IV_Raw',
background = styleColorBar(range(all_frame$IV_Raw, na.rm = TRUE, finite = TRUE), 'lightgreen'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle('IV_TS',
background = styleColorBar(range(all_frame$IV_TS, na.rm = TRUE, finite = TRUE), 'pink'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle('NotMissing',
background = styleColorBar(c(0, 1), 'lightgrey'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatStyle('Levels',
background = styleColorBar(range(all_frame$Levels, na.rm = TRUE, finite = TRUE), 'lightgrey'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center') %>%
formatRound(columns = c("IV_Raw"),
digits = 8) %>%
formatRound(columns = c("IV_TS"),
digits = 8) %>%
formatPercentage(columns = c("NotMissing"),
digits = 4)
plotting <- ggplot(all_frame, aes(x = IV_Raw, y = IV_TS, color = RankIV_Raw, color2 = RankIV_TS)) + labs(title = "IV Time-Series vs IV Raw Data, Rank Multivariate", x = "Information Value (Raw Data)", y = "Information Value (Time-Series)") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "IV Rank (Raw Data)", axis_title_y = "IV Rank (Time-Series)") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(all_frame, aes(x = RankIV_Raw, y = RankIV_TS, color = IV_Raw, color2 = IV_TS)) + labs(title = "IV Rank Time-Series vs IV Rank Raw Data, Rank Multivariate", x = "Information Value Rank (Raw Data)", y = "Information Value Rank (Time-Series)") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "IV (Raw Data)", axis_title_y = "IV (Time-Series)") + scale_x_reverse(lim = rev(range(all_frame$RankIV_Raw))) + scale_y_reverse(lim = rev(range(all_frame$RankIV_TS))) + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(all_frame, aes(x = IV_Raw, y = IV_TS, color = Levels, color2 = NotMissing)) + labs(title = "IV Time-Series vs IV Raw Data, Levels/Not-missing Multivariate", x = "Information Value (Raw Data)", y = "Information Value (Time-Series)") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Levels", axis_title_y = "Not-missing") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(all_frame, aes(x = RankIV_Raw, y = RankIV_TS, color = Levels, color2 = NotMissing)) + labs(title = "IV Rank Time-Series vs IV Rank Raw Data, Levels/Not-missing Multivariate", x = "Information Value Rank (Raw Data)", y = "Information Value Rank (Time-Series)") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "Levels", axis_title_y = "Not-missing") + scale_x_reverse(lim = rev(range(all_frame$RankIV_Raw))) + scale_y_reverse(lim = rev(range(all_frame$RankIV_TS))) + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(all_frame, aes(x = Levels, y = NotMissing, color = IV_Raw, color2 = IV_TS)) + labs(title = "Not-missing vs Levels, IV Time-Series / Raw Data Multivariate", x = "Levels", y = "Not-missing") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "IV (Raw Data)", axis_title_y = "IV (Time-Series)") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)
plotting <- ggplot(all_frame, aes(x = Levels, y = NotMissing, color = RankIV_Raw, color2 = RankIV_TS)) + labs(title = "Not-missing vs Levels, IV Rank Time-Series / Raw Data Multivariate", x = "Levels", y = "Not-missing") + theme_bw() + geom_rug() + scale_color_colorplane(axis_title = "IV Rank (Raw Data)", axis_title_y = "IV Rank (Time-Series)") + theme(plot.title = element_text(size = rel(2), face = "bold"), axis.title.x = element_text(size = rel(2)), axis.title.y = element_text(size = rel(2)), axis.text.x = element_text(size = rel(2)), axis.text.y = element_text(size = rel(2))) + geom_point_interactive(aes(tooltip = Feature), size = 2)
ggiraph(code = print(plotting), width_svg = 18, height_svg = 14, zoom_max = 5)